set seed 5555 
clear
set more off	

***Generate input datasets from source files

//Extract project level variables of interest
use "../Data/Indonesia/all_project_table.dta", clear 
gen z_line_id = _n 
preserve
keep if state_name != "" & state_num != .
keep state_name state_num
bys state_name state_num: keep if _n == 1
sort state_num
tempfile tempstate
save `tempstate', replace
restore

replace state_name = "" if state_name == " "
sort state_num
drop _merge
merge state_num using `tempstate', uniqusing update
assert state_name != "" if state_num != .

gen prequal_dummy = 0 if bid_type == "Pelelangan Umum Pasca Kualifikasi" 
replace prequal_dummy = 1 if (bid_type == "Pelelangan Umum Pra Kualifikasi" | bid_type == "Seleksi Umum" | bid_type == "Pemilihan Langsung" | bid_type == "Penunjukkan Langsung" | bid_type == "Seleksi Langsung" | bid_type == "Seleksi Terbatas" | bid_type == "Pelelangan Terbatas")

drop if state_num == .
drop if bid_type == ""
drop if prequal_dummy == .  

gen year_cut = substr(uniq_pid,1,4)
destring(year_cut), replace
replace year = year_cut if year == .



*************************************************************
//Merging variables of interest with firm level dataset *****
*************************************************************

keep uniq_pid prequal_dummy konsultan bid_type proc_type all_pd year bid_type ba_an_ppt_date penun_det_tanggal_surat newsp_pub_date penun_det_ci_prov_cd bidang_pekerjaan z_line_id
sort uniq_pid 

merge uniq_pid using "../Data/Indonesia/all_firm_table", uniqmaster
drop if _m == 2

***********************************************************************************
//Addressing duplicates
//34 projects with repeat firms within
duplicates tag uniq_pid firm_name, gen(tag)
duplicates tag, gen(full_tag)
bys uniq_pid: egen any_tag=max(tag)
drop if any_tag>=1

**Searching for indicators of mis-scraped actual duplicates (not caught by duplicates command b/c contain "1." etc in front)
//Dropping projects with false firms based on name
gen firm_cut = substr(firm_name, 1,2)
gen firm_cut_ind = 1 if firm_cut == "1." | firm_cut == "2." | firm_cut == "3."| firm_cut == "4."| firm_cut == "5." | firm_cut == "6." | firm_cut == "7." | firm_cut == "8." | firm_cut == "9."
replace firm_cut_ind = 1 if firm_cut == "1 " | firm_cut == "2 " | firm_cut == "3 "| firm_cut == "4 "| firm_cut == "5 " | firm_cut == "6 " | firm_cut == "7 " | firm_cut == "8 " | firm_cut == "9 "
bys uniq_pid: egen firm_cut_any = max(firm_cut_ind)
**Keep project, drop the "1." type duplicate firms
drop if firm_cut_ind == 1
 

**********************************************************************************************************************

**************************************************************************************
****Confirming prequal/has_pnp_val/n_pq_firms_w_pnp connections
**************************************************************************************

**has_pnp_val is at firm level, create dummy variable at project level 
bys uniq_pid: egen has_pnp_max = max(has_pnp_val)

//Recoding mislisted bid_types pasca_k as pre_k
replace bid_type = "Pelelangan Umum Pra Kualifikasi" if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_passed_firms > 0 & prequal_dummy == 0
replace prequal_dummy = 1 if bid_type == "Pelelangan Umum Pra Kualifikasi" & prequal_dummy == 0

replace ppq2_ent_fin_passed = . if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_not_passed_firms > 0
replace has_pnp_val = 0 if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_not_passed_firms > 0
replace has_pnp_max = 0 if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_not_passed_firms > 0
replace n_pq_firms_w_pnp = 0 if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_not_passed_firms > 0
replace pct_firms_passed = . if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_not_passed_firms > 0
replace n_not_passed_firms = 0 if bid_type == "Pelelangan Umum Pasca Kualifikasi" & n_not_passed_firms > 0


********
//Dropping few Pemilihan Langsung projects for which data is incorrect
drop if has_pnp_max == 0 & bid_type == "Pemilihan Langsung" & prequal == 1

//Dropping few Seleksi Umum projects for which data is incorrect
drop if has_pnp_max == 0 & bid_type == "Seleksi Umum"

******
//Confirming cases of not having info on the ppq pages but have some pnp value for the project (which requires having info from the ppq pages)
by uniq_pid: egen ppq2_ent_fin_ind_max = max(ppq2_ent_fin_ind)
by uniq_pid: egen ppq2_ent_drf_ind_max = max(ppq2_ent_drf_ind)
by uniq_pid: egen ppq_ent_ind_max = max(ppq_ent_ind)

//Searching for firms without peserta pages when other firms within that project have these pages
gen non_match_ind = 1 if ppq_ent_ind != ppq_ent_ind_max 
by uniq_pid: egen non_match_tot = total(non_match_ind)
//Dropping thse firms because vast majority having missing firm name and therefore most likely instance of false partial duplicates
drop if non_match_ind == 1


*******************************************************************************************
gen year_cut = substr(uniq_pid,1,4)
destring(year_cut), replace
replace year = year_cut if year == .

*******************************************************************************************

//Addressing instances where some firms in a project have P/NP values and others don't
bys uniq_pid: egen any_lulus = max(ppq2_ent_fin_passed)
replace any_lulus = 0 if any_lulus == .
bys uniq_pid: egen any_tidak_lulus = min(ppq2_ent_fin_passed)
replace any_tidak_lulus = 2 if any_tidak_lulus == 0
replace any_tidak_lulus = 0 if any_tidak_lulus == 1 | any_tidak_lulus == .
replace any_tidak_lulus = 1 if any_tidak_lulus == 2


//Replacing values for firms without P/NP information as no pass in projects where some firms have pass or no pass information
//If all entries either Lulus or no information:
replace ppq2_ent_fin_passed = 0 if has_pnp_val == 0 & any_lulus == 1 & any_tidak_lulus == 0
				
//If have entries Lulus, Tidak_Lulus, and no information
replace ppq2_ent_fin_passed = 0 if has_pnp_val == 0 & any_lulus == 1 & any_tidak_lulus == 1

//Checked on specifics of project
replace ppq2_ent_fin_passed = 1 if uniq_pid == "2008_CTI_0FB8EAA4-59D2-445D-848E-10F41A908B8B"
replace ppq2_ent_fin_passed = 0 if firm_name == "SINAR SAMA SEJATI, PT - ANGKASA PURI KONSURSINDO, PT (JO)" & uniq_pid == "2008_CTI_0FB8EAA4-59D2-445D-848E-10F41A908B8B"


//Regenerating to account for corrections
by uniq_pid: egen n_pq_firms_all2 = count(uniq_pid != "")

foreach x of var n_pq_firms_all{
	replace `x' = `x'2
	drop `x'2
}



 
*********************************
//Investigating whether have ppq2 final without ppq2 draft and/or ppq ever (etc.)

**One project is missing ppq or ppq2_final while having ppq_draft: "2006_CTI_3779"
drop if uniq_pid == "2006_CTI_3779"

gen marker = 1 if ppq2_ent_fin_ind == 1 & ppq_ent_ind == 0
replace ppq2_ent_fin_ind = 0 if marker == 1
replace ppq_ent_ind = 1 if marker == 1
drop marker

***Website data issues with these few projects
drop if ppq_ent_ind == 0



*********************************************************************************************

//Checking final_bid values

gen fin_bid_over_pd = final_bid / all_pd

bys uniq_pid: egen final_bid_max = max(final_bid)
gen fin_bid_over_pd_max = final_bid_max / all_pd


//Making corrections based on specific project investigations on website previously
replace final_bid = final_bid / 1000 if uniq_pid == "2005_CTI_597" & fin_bid_over_pd_max >= 1000
replace final_bid_max = final_bid_max / 1000 if uniq_pid == "2005_CTI_597" & fin_bid_over_pd_max >= 1000
replace fin_bid_over_pd = final_bid / all_pds if uniq_pid == "2005_CTI_597" & fin_bid_over_pd_max >= 1000
replace fin_bid_over_pd_max = final_bid_max / all_pds if uniq_pid == "2005_CTI_597" & fin_bid_over_pd_max >= 1000

gen final_bid_alt1 = pemlel_fin_harga_rp
gen final_bid_alt2 = pkb_ent_nilai_penawaran_rp

gen final_bid_ratio1 = final_bid / pemlel_fin_harga_rp
gen final_bid_ratio2 = final_bid / pkb_ent_nilai_penawaran_rp

gen final_too_big = 3 if uniq_pid == "2005_CTI_1608" | uniq_pid == "2005_CTI_1610" | uniq_pid == "2007_CTI_2529"  
gen PD_too_small = 3 if uniq_pid == "2005_CTI_3075" | uniq_pid == "2006_CTI_4353" | uniq_pid == "2007_CTI_2381" | uniq_pid == "2004_CTI_10549" | uniq_pid == "2004_CTI_9489"

label var final_too_big "Indicator for original scraped final bid value being too large number signifies number of 0's too big"
label var PD_too_small  "Indicator for original scraped pagu dana value being too small number signfies number of 0's too small"

replace final_bid = final_bid / 1000 if final_too_big == 3 & fin_bid_over_pd_max > 600
replace final_bid_max = final_bid_max / 1000 if final_too_big == 3 & fin_bid_over_pd_max > 600 
replace fin_bid_over_pd = final_bid / all_pds if final_too_big == 3 & fin_bid_over_pd_max > 600
replace fin_bid_over_pd_max = final_bid_max / all_pds if final_too_big == 3 & fin_bid_over_pd_max > 600

replace all_pds = all_pds * 1000 if PD_too_small == 3 & fin_bid_over_pd_max > 600
replace fin_bid_over_pd = final_bid / all_pds if PD_too_small == 3 & fin_bid_over_pd_max > 600
replace fin_bid_over_pd_max = final_bid_max / all_pds if PD_too_small == 3 & fin_bid_over_pd_max > 600


//Dropping project due to implausable combination PD and final_bid values without clear correction
drop if uniq_pid == "2005_CTI_3695"

***2006_CTI_4376 : recode this using earlier final bid info
replace final_bid = final_bid_alt1 if uniq_pid == "2006_CTI_4376" & final_bid != .
bys uniq_pid: egen final_bid_max0 = max(final_bid)
replace final_bid_max = final_bid_max0 if uniq_pid == "2006_CTI_4376"
drop final_bid_max0
replace fin_bid_over_pd = final_bid / all_pds if uniq_pid == "2006_CTI_4376"
replace fin_bid_over_pd_max = final_bid_max / all_pds if uniq_pid == "2006_CTI_4376"

replace final_too_big = 2 if uniq_pid == "2004_CTI_8507" | uniq_pid == "2004_CTI_8508" | uniq_pid == "2004_CTI_9893"
replace PD_too_small = 2 if uniq_pid == "2007_CTI_2478"
replace PD_too_small = 1 if uniq_pid == "2006_CTI_4936"

replace final_bid = final_bid / 100 if final_too_big == 2 & fin_bid_over_pd_max > 93
replace final_bid_max = final_bid_max / 100 if final_too_big == 2 & fin_bid_over_pd_max > 93
replace fin_bid_over_pd = final_bid / all_pds if final_too_big == 2 & fin_bid_over_pd_max > 93
replace fin_bid_over_pd_max = final_bid_max / all_pds if final_too_big == 2& fin_bid_over_pd_max > 93

replace all_pds = all_pds * 100 if PD_too_small == 2 & fin_bid_over_pd_max > 88
replace all_pds = all_pds * 10 if PD_too_small == 1& fin_bid_over_pd_max > 13
replace fin_bid_over_pd = final_bid / all_pds if ( PD_too_small == 2 & fin_bid_over_pd_max > 88) | (PD_too_small == 1 & fin_bid_over_pd_max > 13)
replace fin_bid_over_pd_max = final_bid_max / all_pds if (PD_too_small == 2 & fin_bid_over_pd_max > 88)| (PD_too_small == 1 & fin_bid_over_pd_max > 13)

//Dropping obviously too low values (less than 10% of pagu dana values)
drop if fin_bid_over_pd_max < 0.10

replace all_pds = . if all_pds == 0

gen final_bid_ratio1a = final_bid / pemlel_fin_harga_rp
gen final_bid_ratio2a = final_bid / pkb_ent_nilai_penawaran_rp
gen final_bid_ratio3a = final_bid / penun_det_harga_negosiasi_rp

//Correcting other bid variables to take into account earlier correction made to final_bid
replace pemlel_fin_harga_rp = pemlel_fin_harga_rp / 1000 if final_too_big == 3 & final_bid / pemlel_fin_harga_rp < .0015
replace pkb_ent_nilai_penawaran_rp = pkb_ent_nilai_penawaran_rp / 1000 if final_too_big == 3 & final_bid / pkb_ent_nilai_penawaran_rp < .0015

replace penun_det_harga_negosiasi_rp = penun_det_harga_negosiasi_rp / 1000 if final_bid / penun_det_harga_negosiasi_rp < 0.0015
replace penun_det_harga_negosiasi_rp = penun_det_harga_negosiasi_rp / 100 if final_bid/ penun_det_harga_negosiasi_rp < .015 

replace penun_det_harga_negosiasi_rp = . if uniq_pid == "2006_CTI_4376"
replace has_harga_neg = . if uniq_pid == "2006_CTI_4376"
replace num_harga_bids = 0 if uniq_pid == "2006_CTI_4376" 

drop final*ratio*a


//Use same cutoffs for other bid variables
replace pemlel_fin_harga_rp = . if pemlel_fin_harga_rp / all_pds < 0.1
replace pemlel_fin_harga_rp = . if pemlel_fin_harga_rp / all_pds > 10

replace pkb_ent_nilai_penawaran_rp = . if pkb_ent_nilai_penawaran_rp / all_pds < 0.1
replace pkb_ent_nilai_penawaran_rp = . if pkb_ent_nilai_penawaran_rp / all_pds > 10

  

//Re-generating from corrected variables which are used to determine the final_bid value
sort uniq_pid
gen has_nilai_pen_rp2 = (pkb_ent_nilai_penawaran_rp != .) & (pkb_ent_nilai_penawaran_rp != 0)
by uniq_pid: egen num_np_bids2 = total(has_nilai_pen_rp2)
replace num_np_bids = num_np_bids2
drop num_np_bids2

by uniq_pid: egen proj_final_bid2 = min(final_bid)
replace proj_final_bid = proj_final_bid2
drop proj_final_bid2


************************************************************************

bys uniq_pid: egen count_nego = count(penun_det_harga_negosiasi_rp)
bys uniq_pid: egen final_nego_rp = min(penun_det_harga_negosiasi_rp)
replace final_nego_rp = . if count_nego == 0
replace final_nego_rp = . if final_nego_rp == 0 




**Designated to set of variables to keep for subsequent use, "z_" prefix 

foreach x of var uniq_pid year all_pds n_pq_firms_all num_np_bids final_bid proj_final_bid final_nego_rp penun_det_ci_prov_cd {
	gen z_`x' = `x'
}

foreach x of var state_num state_name proc_type konsultan firm_name pemlel_fin_group_enc ba_an_ppt_date penun_det_tanggal_surat newsp_pub_date{ 
	gen z_`x' = `x'
}

rename pkb_ent_nilai_penawaran_rp z_bid  // add May17, 2022
label var z_bid 									"value of bid" // add May17, 2022
 
label var z_uniq_pid								"Unique Project ID" 
label var z_year									"Year"
label var z_all_pds									"Pagu Dana (price ceiling) Value" 
label var z_n_pq_firms_all							"Number of firms in project, from ppq & ppq2 tables"
label var z_num_np_bids								"Number of nilai penawaran bids in project"
label var z_final_bid								"Firm final bid value, if has_final_bid == 1" 
label var z_proj_final_bid							"Project final bid"
label var z_final_nego_rp							"Project final negotiated price"
label var z_state_num								"Province Number"
label var z_state_name								"Province Name"
label var z_proc_type								"Procurement Type, SEP or CTI" 
label var z_konsultan								"0/1: Konsultan project" 
label var z_firm_name								"Firm name"
label var z_pemlel_fin_group_enc					"Firm rank"
label var z_ba_an_ppt_date							"Bidding opening date"
label var z_penun_det_tanggal_surat					"Bidding winning date"
label var z_newsp_pub_date							"Newspaper publication date"
label var z_penun_det_ci_prov_cd					"Winning firm province"

order z_uniq_pid z_*

//Merging with updated firm names 
sort firm_name firm_name_match_var
merge firm_name firm_name_match_var using "../Data/Indonesia/firms_rev_new_fixed", uniqusing _merge(_aa)
tab _aa
drop if _aa == 2

//Correcting few mistaken missings
replace firmnamerev = "bragahdayasakti" if firm_name_match_var == "bragahdayasaktijlnusaindahiica/ponianak"
replace firmnamerev = "ganesengineeringconsultant" if firmnamerev == ""	


**Winning bidder won in 2004
egen firm_code = group(firmnamerev)
g temp_won_cont_in_2004 = 1 if year == 2004 & penun_det_ind == 1
bys firm_code: egen firm_won_cont_in_2004 = total(temp_won_cont_in_2004)
replace firm_won_cont_in_2004 = 1 if firm_won_cont_in_2004 > 1 & firm_won_cont_in_2004 != .
replace firm_won_cont_in_2004 = 0 if firm_won_cont_in_2004 != 1
	

gen temp_is_winner_won04 = 1 if firm_won_cont_in_2004 == 1 & pemlel_fin_group_enc == 1
replace temp_is_winner_won04 = 0 if firm_won_cont_in_2004 == 0 & pemlel_fin_group_enc == 1
bys uniq_pid: egen z_is_winner_won04 = mean(temp_is_winner_won04)
//3 projects have 0.5 value, 1 project in 2004 has value 0, set these to missing
replace z_is_winner_won04 = . if z_is_winner_won04 == 0.5
replace z_is_winner_won04 = . if z_is_winner_won04 == 0 & z_year == 2004

label var z_is_winner_won04 "Is winning bidder won04"



**Firm-level dataset
keep z_* uniq_pid firm_name firm_code firm_name_match_var firmnamerev // add firm_code (May 17 2022)
sort uniq_pid
save "../Data/Indonesia/z_all_firm_table", replace


**Project level dataset

keep z_* uniq_pid  
drop z_bid z_firm_name z_pemlel_fin_group_enc z_final_bid  // add z_bid (May 17 2022)

duplicates drop
codebook uniq_pid
sort uniq_pid

****
//Generate additional variables

//Date info

gen newsp_pub_month = regexs(1) if regexm(z_newsp_pub_date,"([a-zA-z]+)")
replace newsp_pub_month = "" if newsp_pub_month == "NA"
replace newsp_pub_month = "January" if newsp_pub_month == "Jan" | newsp_pub_month == "Januari" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 1
replace newsp_pub_month = "February" if newsp_pub_month == "Feb" | newsp_pub_month == "Februari" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 2
replace newsp_pub_month = "March" if newsp_pub_month == "Mar" | newsp_pub_month == "Maret" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 3
replace newsp_pub_month = "April" if newsp_pub_month == "Apr" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 4
replace newsp_pub_month = "May" if newsp_pub_month == "Mei" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 5
replace newsp_pub_month = "June" if newsp_pub_month == "Jun" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 6
replace newsp_pub_month = "July" if newsp_pub_month == "Jul" | newsp_pub_month == "Juli" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 7
replace newsp_pub_month = "August" if newsp_pub_month == "Aug" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 8
replace newsp_pub_month = "September" if newsp_pub_month == "Sep" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 9
replace newsp_pub_month = "October" if newsp_pub_month == "Oct" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 10
replace newsp_pub_month = "November" if newsp_pub_month == "Nov" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 11
replace newsp_pub_month = "December" if newsp_pub_month == "Desember" | newsp_pub_month == "Dec" | regexm(z_newsp_pub_date,"(^[0-9]+)/") == 12

gen newsp_pub_mon = 1 if newsp_pub_month == "January"
replace newsp_pub_mon = 2 if newsp_pub_month == "February"
replace newsp_pub_mon = 3 if newsp_pub_month == "March"
replace newsp_pub_mon = 4 if newsp_pub_month == "April"
replace newsp_pub_mon = 5 if newsp_pub_month == "May"
replace newsp_pub_mon = 6 if newsp_pub_month == "June"
replace newsp_pub_mon = 7 if newsp_pub_month == "July"
replace newsp_pub_mon = 8 if newsp_pub_month == "August"
replace newsp_pub_mon = 9 if newsp_pub_month == "September"
replace newsp_pub_mon = 10 if newsp_pub_month == "October"
replace newsp_pub_mon = 11 if newsp_pub_month == "November"
replace newsp_pub_mon = 12 if newsp_pub_month == "December"
drop newsp_pub_month

gen newsp_pub_year = "20" + regexs(1) if regexm(z_newsp_pub_date,"([0-9][0-9]$)")
gen newsp_pub_day = regexs(1) if regexm(z_newsp_pub_date,"(^[0-9]+)[ ]")
replace newsp_pub_day = regexs(1) if regexm(z_newsp_pub_date,"(^[0-9]+)/")
destring newsp_pub_year, replace
destring newsp_pub_day, replace

gen bid_open_month = regexs(1) if regexm(z_ba_an_ppt_date,"([a-zA-z]+)")
replace bid_open_month = "" if bid_open_month == "\N" | bid_open_month == "tanggal"
replace bid_open_month = "January" if bid_open_month == "Jan" | bid_open_month == "Januari"
replace bid_open_month = "February" if bid_open_month == "Feb" | bid_open_month == "Februari"
replace bid_open_month = "March" if bid_open_month == "Mar" | bid_open_month == "Maret"
replace bid_open_month = "April" if bid_open_month == "Apr"
replace bid_open_month = "May" if bid_open_month == "Mei"
replace bid_open_month = "June" if bid_open_month == "Jun"
replace bid_open_month = "July" if bid_open_month == "Jul" | bid_open_month == "Juli"
replace bid_open_month = "August" if bid_open_month == "Aug"
replace bid_open_month = "September" if bid_open_month == "Sep"
replace bid_open_month = "October" if bid_open_month == "Oct"
replace bid_open_month = "November" if bid_open_month == "Nov"
replace bid_open_month = "December" if bid_open_month == "Desember" | bid_open_month == "Dec"

gen bid_open_mon = 1 if bid_open_month == "January"
replace bid_open_mon = 2 if bid_open_month == "February"
replace bid_open_mon = 3 if bid_open_month == "March"
replace bid_open_mon = 4 if bid_open_month == "April"
replace bid_open_mon = 5 if bid_open_month == "May"
replace bid_open_mon = 6 if bid_open_month == "June"
replace bid_open_mon = 7 if bid_open_month == "July"
replace bid_open_mon = 8 if bid_open_month == "August"
replace bid_open_mon = 9 if bid_open_month == "September"
replace bid_open_mon = 10 if bid_open_month == "October"
replace bid_open_mon = 11 if bid_open_month == "November"
replace bid_open_mon = 12 if bid_open_month == "December"
drop bid_open_month


gen bid_open_year = "20" + regexs(1) if regexm(z_ba_an_ppt_date,"([0-9][0-9]$)")
gen bid_open_day = regexs(1) if regexm(z_ba_an_ppt_date,"(^[0-9]+)[ ]")
destring bid_open_year, replace
destring bid_open_day, replace

gen bid_win_month = regexs(1) if regexm(z_penun_det_tanggal_surat,"([a-zA-z]+)")
replace bid_win_month = "" if bid_win_month == "PEMENANG"
replace bid_win_month = "January" if bid_win_month == "Jan" | bid_win_month == "Januari" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 1
replace bid_win_month = "February" if bid_win_month == "Feb" | bid_win_month == "Februari" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 2
replace bid_win_month = "March" if bid_win_month == "Mar" | bid_win_month == "Maret" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 3
replace bid_win_month = "April" if bid_win_month == "Apr" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 4
replace bid_win_month = "May" if bid_win_month == "Mei" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 5
replace bid_win_month = "June" if bid_win_month == "Jun" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 6
replace bid_win_month = "July" if bid_win_month == "Jul" | bid_win_month == "Juli" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 7
replace bid_win_month = "August" if bid_win_month == "Aug" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 8
replace bid_win_month = "September" if bid_win_month == "Sep" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 9
replace bid_win_month = "October" if bid_win_month == "Oct" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 10
replace bid_win_month = "November" if bid_win_month == "Nov" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 11
replace bid_win_month = "December" if bid_win_month == "Desember" | bid_win_month == "Dec" | regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/") == 12

gen bid_win_mon = 1 if bid_win_month == "January"
replace bid_win_mon = 2 if bid_win_month == "February"
replace bid_win_mon = 3 if bid_win_month == "March"
replace bid_win_mon = 4 if bid_win_month == "April"
replace bid_win_mon = 5 if bid_win_month == "May"
replace bid_win_mon = 6 if bid_win_month == "June"
replace bid_win_mon = 7 if bid_win_month == "July"
replace bid_win_mon = 8 if bid_win_month == "August"
replace bid_win_mon = 9 if bid_win_month == "September"
replace bid_win_mon = 10 if bid_win_month == "October"
replace bid_win_mon = 11 if bid_win_month == "November"
replace bid_win_mon = 12 if bid_win_month == "December"
drop bid_win_month


gen bid_win_year = "20" + regexs(1) if regexm(z_penun_det_tanggal_surat,"([0-9][0-9]$)")
gen bid_win_day = regexs(1) if regexm(z_penun_det_tanggal_surat,"(^[0-9]+)[ ]")
replace bid_win_day = regexs(1) if regexm(z_penun_det_tanggal_surat,"(^[0-9]+)/")
destring bid_win_year, replace
destring bid_win_day, replace

//Correcting mistakes in year entry (checked against "year" and corresponding other bid_win/open values to determine year)
replace bid_win_year = 2008 if bid_win_year == 2028
replace bid_win_year = 2006 if bid_win_year == 2055
replace bid_win_year = 2006 if bid_win_year == 2009

replace bid_open_year = 2005 if bid_open_year == 2055
replace bid_open_year = 2007 if bid_open_year == 2027
replace bid_open_year = 2006 if bid_open_year == 2010


gen newsp_publ_date = mdy(newsp_pub_mon, newsp_pub_day, newsp_pub_year)
gen bid_win_date = mdy(bid_win_mon, bid_win_day, bid_win_year)
gen bid_open_date = mdy(bid_open_mon, bid_open_day, bid_open_year)

gen bid_mon_diff = bid_win_mon - bid_open_mon

gen z_auction_elapsed_time = bid_win_date - bid_open_date 
replace z_auction_elapsed_time = . if z_auction_elapsed_time < 0

gen z_newsp_award_elapsed_time = bid_win_date - newsp_publ_date 
replace z_newsp_award_elapsed_time = . if z_newsp_award_elapsed_time < 0
drop bid_open_year  bid_win_day bid_win_mon bid_win_year  // add bid_win_day bid_win_mon bid_win_year (May 17, 2022)


**1 percent trimming
summ z_n_pq_firms_all if z_konsultan == 0, d
gen trim_top_1p_all = r(p99) if z_konsultan == 0
gen trim_bot_1p_all = r(p1) if z_konsultan == 0

summ z_n_pq_firms_all if z_konsultan == 1, d
replace trim_top_1p_all = r(p99) if z_konsultan == 1
replace trim_bot_1p_all = r(p1) if z_konsultan == 1

gen trim_1p_all = ((z_n_pq_firms_all > trim_top_1p_all) | (z_n_pq_firms_all < trim_bot_1p_all)) & z_n_pq_firms_all != .
replace z_n_pq_firms_all = . if trim_1p_all == 1

drop trim_top_1p_all trim_bot_1p_all trim_1p_all
summ z_num_np_bids if z_konsultan == 0, d
gen trim_top_1p_all = r(p99) if z_konsultan == 0
gen trim_bot_1p_all = r(p1) if z_konsultan == 0

summ z_num_np_bids if z_konsultan == 1, d
replace trim_top_1p_all = r(p99) if z_konsultan == 1
replace trim_bot_1p_all = r(p1) if z_konsultan == 1

gen trim_1p_all = ((z_num_np_bids > trim_top_1p_all) | (z_num_np_bids < trim_bot_1p_all)) & z_num_np_bids != .
replace z_num_np_bids = . if trim_1p_all == 1
drop trim_top_1p_all trim_bot_1p_all trim_1p_all



sort uniq_pid

**************
g sep = 1 if z_proc_type == "SEP"
replace sep = 0 if z_proc_type == "CTI"
sort z_year z_state_num

gen y2004 = (z_year == 2004)
gen y2005 = (z_year == 2005)
gen y2006 = (z_year == 2006)
gen y2007 = (z_year == 2007)
gen y2008 = (z_year == 2008)
			
g eproc_start_year = .
replace eproc_start_year = 2006 if z_state_name == "BALI" | z_state_num == 22
replace eproc_start_year = 2008 if z_state_name == "BANGKA BELITUNG"  | z_state_num == 30
replace eproc_start_year = 2005 if z_state_name == "BANTEN" | z_state_num == 29
replace eproc_start_year = 2007 if z_state_name == "BENGKULU" | z_state_num == 26
replace eproc_start_year = 2005 if z_state_name == "DI YOGYAKARTA"  | z_state_num == 4
replace eproc_start_year = 2004 if z_state_name == "DKI JAKARTA" | z_state_num == 1
replace eproc_start_year = 2006 if z_state_name == "GORONTALO" | z_state_num == 31
replace eproc_start_year = 2008 if z_state_name == "IRIAN JAYA PAPUA" | z_state_num == 25
replace eproc_start_year = 2008 if z_state_name == "IRJA BARAT" | z_state_num == 33
replace eproc_start_year = 2007 if z_state_name == "JAMBI" | z_state_num == 10
replace eproc_start_year = 2005 if z_state_name == "JAWA BARAT" | z_state_num == 2
replace eproc_start_year = 2005 if z_state_name == "JAWA TENGAH" | z_state_num == 3
replace eproc_start_year = 2005 if z_state_name == "JAWA TIMUR" | z_state_num == 5
replace eproc_start_year = 2008 if z_state_name == "KALIMANTAN BARAT" | z_state_num == 13
replace eproc_start_year = 2007 if z_state_name == "KALIMANTAN SELATAN" | z_state_num == 15
replace eproc_start_year = 2008 if z_state_name == "KALIMANTAN TENGAH" | z_state_num == 14
replace eproc_start_year = 2006 if z_state_name == "KALIMANTAN TIMUR" | z_state_num == 16
replace eproc_start_year = 2008 if z_state_name == "KEP RIAU" | z_state_num == 32
replace eproc_start_year = 2007 if z_state_name == "LAMPUNG" | z_state_num == 12
replace eproc_start_year = 2008 if z_state_name == "MALUKU" | z_state_num == 21
replace eproc_start_year = 2008 if z_state_name == "MALUKU UTARA" | z_state_num == 28
replace eproc_start_year = 2007 if z_state_name == "NANGGROE ACEH DARUSSALAM" | z_state_num == 6
replace eproc_start_year = 2007 if z_state_name == "NTB" | z_state_num == 23
replace eproc_start_year = 2008 if z_state_name == "NTT" | z_state_num == 24
replace eproc_start_year = 2004 if z_state_name == "PUSAT" | z_state_num == 0
replace eproc_start_year = 2007 if z_state_name == "RIAU" | z_state_num == 9
replace eproc_start_year = 2009 if z_state_name == "SELAWESI BARAT" | z_state_num == 34
replace eproc_start_year = 2006 if z_state_name == "SULAWESI SELATAN" | z_state_num == 19
replace eproc_start_year = 2008 if z_state_name == "SULAWESI TENGAH" | z_state_num == 18
replace eproc_start_year = 2008 if z_state_name == "SULAWESI TENGGARA" | z_state_num == 20
replace eproc_start_year = 2007 if z_state_name == "SULAWESI UTARA" | z_state_num == 17
replace eproc_start_year = 2006 if z_state_name == "SUMAERA SELATAN" | z_state_num == 11
replace eproc_start_year = 2006 if z_state_name == "SUMATERA BARAT" | z_state_num == 8
replace eproc_start_year = 2006 if z_state_name == "SUMATERA UTARA" | z_state_num == 7
	 
assert eproc_start_year != . if z_state_name != "" | z_state_num != .


***Info on which province winner from (translate official province code to corresponding state_num)
gen winner_state_num = .
replace winner_state_num = 1  if z_penun_det_ci_prov_cd == "31"
replace winner_state_num = 2  if z_penun_det_ci_prov_cd == "32"	
replace winner_state_num = 3  if z_penun_det_ci_prov_cd == "33"
replace winner_state_num = 4  if z_penun_det_ci_prov_cd == "34"
replace winner_state_num = 5  if z_penun_det_ci_prov_cd == "35"	
replace winner_state_num = 6  if z_penun_det_ci_prov_cd == "11"	
replace winner_state_num = 7  if z_penun_det_ci_prov_cd == "71"		
replace winner_state_num = 8  if z_penun_det_ci_prov_cd == "76"	
replace winner_state_num = 9  if z_penun_det_ci_prov_cd == "14"	
replace winner_state_num = 10 if z_penun_det_ci_prov_cd == "15"	
replace winner_state_num = 11 if z_penun_det_ci_prov_cd == "16"	
replace winner_state_num = 12 if z_penun_det_ci_prov_cd == "18"	
replace winner_state_num = 13 if z_penun_det_ci_prov_cd == "61"	
replace winner_state_num = 14 if z_penun_det_ci_prov_cd == "62"	
replace winner_state_num = 15 if z_penun_det_ci_prov_cd == "63"	
replace winner_state_num = 16 if z_penun_det_ci_prov_cd == "64"	
replace winner_state_num = 17 if z_penun_det_ci_prov_cd == "71"	
replace winner_state_num = 18 if z_penun_det_ci_prov_cd == "72"	
replace winner_state_num = 19 if z_penun_det_ci_prov_cd == "73"	
replace winner_state_num = 20 if z_penun_det_ci_prov_cd == "74"	
replace winner_state_num = 21 if z_penun_det_ci_prov_cd == "81"	
replace winner_state_num = 22 if z_penun_det_ci_prov_cd == "51"
replace winner_state_num = 23 if z_penun_det_ci_prov_cd == "52"	
replace winner_state_num = 24 if z_penun_det_ci_prov_cd == "53"	
replace winner_state_num = 25 if z_penun_det_ci_prov_cd == "94"	
replace winner_state_num = 26 if z_penun_det_ci_prov_cd == "17"	 	
replace winner_state_num = 28 if z_penun_det_ci_prov_cd == "82"	
replace winner_state_num = 29 if z_penun_det_ci_prov_cd == "36"	
replace winner_state_num = 30 if z_penun_det_ci_prov_cd == "19"			
replace winner_state_num = 31 if z_penun_det_ci_prov_cd == "75"	
replace winner_state_num = 32 if z_penun_det_ci_prov_cd == "21"	
replace winner_state_num = 33 if z_penun_det_ci_prov_cd == "91"	
replace winner_state_num = 34 if z_penun_det_ci_prov_cd == "76"	

gen z_is_winner_in_prov = (winner_state_num == z_state_num) if (winner_state_num != . & z_state_num != .)

g eproc_on = (z_year >= eproc_start_year) if eproc_start_year!= .

	
g lnpagudana = ln(z_all_pds)
  
* Log Total budget for state-year, overall and by konsultan
bys z_state_num z_year: egen double z_totalstateyearbudget = total(z_all_pds)
g lnstateyearbudget = ln(z_totalstateyearbudget)


bys z_state_num z_year z_konsultan: egen double z_totalstateyearbudget_kons = total(z_all_pds)
g lnstateyearbudget_kons = ln(z_totalstateyearbudget_kons)
  
* Total projects for state-year, overall and by konsultan
bys z_state_num z_year: egen double z_stateyearprojects = count(z_all_pds)
bys z_state_num z_year z_konsultan: egen double z_stateyearprojects_kons = count(z_all_pds)

    
	
* Create log versions of variables
g lnbid = ln(z_proj_final_bid)
g lnfinalnegobid = ln(z_final_nego_rp)
replace lnfinalnegobid = lnbid if lnfinalnego == .

	
label var lnbid "Log minimum bid recorded"
label var lnfinalnegobid "Log final negotiated price, with min bid price if missing"
label var lnstateyearbudget "Log of total budget in state year from pagu dana"
label var z_stateyearprojects "Total number of projects in state year"
label var lnstateyearbudget_kons "Log of total budget in state year from pagu dana, split by consultant"
label var z_stateyearprojects_kons "Total number of projects in state year, split by consultant"


save "../Data/Indonesia/z_all_project_table", replace

merge 1:m z_uniq_pid using "../Data/Indonesia/z_all_firm_table"
sort z_uniq_pid
by z_uniq_pid: g n = _n 
sum lnfinalnegobid z_n_pq_firms_all z_num_np_bids z_newsp_award_elapsed_time z_auction_elapsed_time if z_konsultan == 0 & n == 1

egen contract_code = group(uniq_pid) 

* bid ranking 
sort uniq_pid z_bid z_line_id  
by uniq_pid: g z_bid_rank = _n if z_bid < . 
g winner = 0 if z_bid < .  
replace winner = 1 if z_final_bid != .

g min_win =. 
replace min_win = 0 if z_bid_rank != 1 & z_final_bid != . 
replace min_win = 1 if z_bid_rank == 1 & z_final_bid != . 
label var min_win "Whether a winner is the bidder offers minimum price or not (1　=　yes, 0　=　no) "

g temp_final_bid = . 
replace temp_final_bid = 1 if (min_win == 1 | min_win == 0)
br uniq_pid z_bid z_final_bid winner 

g percent_final_bid = z_final_bid / z_all_pds 
sum  percent_final_bid, detail
hist percent_final_bid if percent_final_bid <= 1
*201 observation -> final bid is above the price celling value (1%)

* define running variable 
forvalues x = 1(1)4{
	cap drop temp 
	gen temp = z_bid if z_bid_rank == `x'
	bys uniq_pid: egen z_bid`x' = max(temp)
	}

	gen delta = (z_bid - z_bid1)/z_all_pds          if z_bid_rank > 1 
	replace delta = (z_bid - z_bid2)/z_all_pds      if z_bid_rank == 1 
	
	gen delta_placebo = (z_bid - z_bid2)/z_all_pds          if z_bid_rank > 2 
	replace delta_placebo = (z_bid - z_bid3)/z_all_pds      if z_bid_rank == 2 
	
	*add delta3 Sep 2, 2024 
	gen delta3 = (z_bid - z_bid1)/z_bid1          if z_bid_rank > 1 
	replace delta3 = (z_bid - z_bid2)/z_bid1      if z_bid_rank == 1 
	
	gen delta3_placebo = (z_bid - z_bid2)/z_bid1          if z_bid_rank > 2 
	replace delta3_placebo = (z_bid - z_bid3)/z_bid1      if z_bid_rank == 2 
	
g temp_price = z_bid if z_final_bid !=. 

cap drop temp_final_bid 
by uniq_pid: egen temp_final_bid = max(temp_price) 
format z_bid temp_final_bid %30.0g

cap drop temp 
g temp =. 
replace temp = 0 if z_bid <  temp_final_bid 
replace temp = 1 if z_bid >= temp_final_bid & z_bid <.
sort uniq_pid z_bid z_line_id  
br  uniq_pid z_bid temp
by uniq_pid: replace temp = temp[_n-1] + temp[_n]  if _n > 1  
rename temp z_final_rank 

cap drop z_bid1-z_bid4 
forvalues x = 1(1)4{
	cap drop temp 
	gen temp = z_bid if z_final_rank == `x'
	bys uniq_pid: egen z_bid`x' = max(temp)
	}

	
	gen delta2 = (z_bid - z_bid1)/z_all_pds          if z_final_rank > 1 
	replace delta2 = (z_bid - z_bid2)/z_all_pds      if z_final_rank == 1 

	gen delta2_placebo = (z_bid - z_bid2)/z_all_pds          if z_final_rank > 2 
	replace delta2_placebo = (z_bid - z_bid3)/z_all_pds      if z_final_rank == 2 

cap drop z_bid1-z_bid4 
	
label var delta "running variable(minimum price)"
label var delta2 "running variable(winner's bid price)"
label var delta3 "running variable(minimum price normaized by minimum price)"
label var lnbid "Log minimum bid recorded"
label var lnfinalnegobid "Log final negotiated price, with min bid price if missing" 

sort contract_code z_bid 

*hist delta if abs(delta) < .5 & z_konsultan == 0 , graphregion(color(white)) title ("All", size(huge)) xlabel(,labsize(huge)) xtitle("{&Delta}", size(huge) margin(medium)) yscale(off) color(gs12) bin(200)
*graph export "figures/Indonesia/indonesia_delta.png", width(4096) replace

rename bid_win_date bid_date 
rename firm_code firm_id 
rename contract_code contract_id 
rename z_final_bid win_bid 

************* backlog construction module *****************
do "backlog_construction"	
backlog_construction "win_bid" "winner"
************************************************

************* fowardlog construction module *****************
do "forwardlog_construction"	
fowardlog_construction "win_bid" "winner"
************************************************


sum lnfinalnegobid z_n_pq_firms_all z_num_np_bids z_newsp_award_elapsed_time z_auction_elapsed_time if z_konsultan == 0 & n == 1

save "../Data/Indonesia/xyz.dta", replace 
erase "../Data/Indonesia/z_all_firm_table.dta"
erase "../Data/Indonesia/z_all_project_table.dta"




